“Files Used:”

Z:/COVID-19_WastewaterAnalysis/data/processed/MMSD_Interceptor_Cases_2_7_22.csv

Z:/COVID-19_WastewaterAnalysis/data/processed/LIMSWasteData_02-09-22.csv

MadDF <- filter(FullDF,Site=="Madison")%>%
    mutate(FlagedOutliers = IdentifyOutliers(N1,Bin = 21, Action = "Flag"),
           #Manual flagging that method misses due to boundary effect with binning
           FlagedOutliers = ifelse(Date == mdy("01/26/2021") | Date == mdy("01/27/2021"),
                                   TRUE, FlagedOutliers),
           NoOutlierVar = ifelse(FlagedOutliers, NA, N1))

OutlierDF <- MadDF%>%
  filter(FlagedOutliers)

FullDFMassRatio <- FullDF%>%
  filter(!is.na(N1))%>%
  mutate(SC2_mass = (3.785*1e6)*FlowRate*N1)


MissingIntercepter <- FullDFMassRatio%>%
  filter(Site!="Madison",!is.na(FlowRate)|!is.na(N1))%>%
  group_by(Date)%>%
  summarize(N = n())%>%
  filter(N!=5)


FullDFMassRatio.Mad <- FullDFMassRatio%>%
  filter(!is.na(SC2_mass))%>%
  filter(Site=="Madison")
FullDFMassRatio.Inter <- FullDFMassRatio%>%
  filter(!is.na(SC2_mass))%>%
  filter(Site!="Madison")#TempErrorMetric

TempErrorMetric <- FullDFMassRatio.Inter%>%
  group_by(Date)%>%
  summarise(InterSum = sum(SC2_mass),InterSumFlow = sum(FlowRate))%>%
  inner_join(FullDFMassRatio.Mad)%>%
  mutate(MassRatio = SC2_mass/InterSum,
         ErrorRatio = 2*(SC2_mass-InterSum)/(SC2_mass+InterSum),
         MassRatioFlow = FlowRate/InterSumFlow,
         ErrorRatioFlow = 2*(FlowRate-InterSumFlow)/(FlowRate+InterSumFlow))


FullDFMassRatio.Mad <- FullDFMassRatio%>%
  filter(!is.na(SC2_mass))%>%
  filter(Site=="Madison")%>%
  inner_join(FullDFMassRatio.Inter["Date"])%>%
  unique()

FullDFMassRatio.Inter <- FullDFMassRatio%>%
  filter(Site!="Madison")
KeyOulierPoints <- c(ymd("2021-06-08"),
                     ymd("2021-10-17"),
                     ymd("2021-05-02"),
                     ymd("2021-01-26"))

NonOuliersDF <- MadDF%>%
  mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
           mutate(N1 = NoOutlierVar)%>%
            filter(!(is.na(N1)))

OutLierPlotDF <- MadDF%>%
  mutate(Outlier = ifelse(FlagedOutliers,N1,NA))%>%
           mutate(N1 = NoOutlierVar)%>%
  filter(!(is.na(N1)&is.na(Outlier)))%>%
  ggplot(aes(x=Date))+#Data depends on time 
  geom_point(aes(y=N1,
                color="N1", 
                info = N1),data=NonOuliersDF,size=.5)+#compares N1
  geom_point(aes(y=Outlier,
                 color="N1 Outlier",
                 info = Outlier))+
  theme_light() +
  scale_color_manual(values=c("#F8766D","#999999"))

ggplotly(OutLierPlotDF)
#"2021-06-08","2021-10-17","2021-05-02","2021-01-26"

Sum of interceptor flows agrees with composite flow except for dates with missing flow information for some interceptors. Purple boxes beneath data are to communicate the Dates with interceptor data partially missing.

MissingFlowData <- FullDFMassRatio.Inter%>%
  group_by(Date)%>%
  summarise(n=n())%>%
  filter(n!=5)%>%
  mutate(MissingSite=TRUE)


FlowPlot <- FullDFMassRatio.Inter%>%
  full_join(MissingFlowData)%>%
  mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
  filter(!is.na(FlowRate))%>%
  ggplot()+
  geom_col(aes(x=Date,y=FlowRate,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) + 
  geom_col(aes(x=Date,y=-.5),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
  scale_fill_brewer(palette="Spectral") +
  scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
  theme_light() +
  geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=FlowRate),size=2)+
  ylab("Average daily flow (MGD)") +
  geom_hline(yintercept=median(FullDFMassRatio.Mad$FlowRate), linetype = "dashed", color="black") +
  ggtitle("Average daily flow (MMSD)")

ggplotly(FlowPlot)%>%
  add_annotations( text="Site, Full Data For Day", xref="paper", yref="paper",
                  x=1.02, xanchor="left",
                  y=0.8, yanchor="bottom",    # Same y as legend below
                  legendtitle=TRUE, showarrow=FALSE ) %>%
  layout( legend=list(y=0.8, yanchor="top" ) )
MassBalencePlot <- FullDFMassRatio.Inter%>%
  full_join(MissingFlowData)%>%
  mutate(MissingSite=ifelse(is.na(MissingSite),FALSE,MissingSite))%>%
  #filter(!is.na(FlowRate))%>%
  #filter(!MissingSite)%>%
  ggplot()+
  geom_col(aes(x=Date,y=SC2_mass,fill=Site,shape=MissingSite),position="stack", stat="identity", width = 3) + 
  geom_col(aes(x=Date,y=-3e12),data=MissingFlowData,color="Purple",fill="Purple", width = 3)+
  scale_fill_brewer(palette="Spectral") +
  scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
  theme_light() +
  geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=SC2_mass),size=1)+
  ylab("N1/N2 gene copies per day") +
  
  ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
  

ggplotly(MassBalencePlot)
InterceptRatioPlot <- FullDFMassRatio.Inter%>%
  group_by(Date)%>%
  summarise(SC2_InterSum = sum(SC2_mass,na.rm = TRUE),
            nIntercepter = n())%>%
  inner_join(FullDFMassRatio)%>%
  mutate(Ratio = (SC2_mass/SC2_InterSum)*(nIntercepter/5))%>%
  arrange(Ratio)%>%
  select(Ratio,nIntercepter, everything())%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y= Ratio,color = Site))
#ggplotly(InterceptRatioPlot)


FullDFMassRatio.Inter%>%
  group_by(Date)%>%
  summarise(SC2_InterSum = sum(SC2_mass,na.rm = TRUE),
            nIntercepter = n())%>%
  inner_join(FullDFMassRatio.Mad)%>%
  mutate(Ratio = (SC2_mass/SC2_InterSum)*(nIntercepter/5))%>%
  arrange(Ratio)%>%
  select(Ratio,nIntercepter, everything())#%>%
## # A tibble: 104 x 16
##    Ratio nIntercepter Date       SC2_InterSum Site    FirstConfirmed
##    <dbl>        <int> <date>            <dbl> <chr>            <int>
##  1 0.184            3 2021-06-17      7.19e12 Madison             NA
##  2 0.195            4 2021-06-28      7.44e12 Madison             NA
##  3 0.279            4 2021-05-27      2.82e13 Madison             NA
##  4 0.289            4 2021-07-01      4.00e12 Madison              6
##  5 0.301            5 2021-07-19      4.63e12 Madison             17
##  6 0.313            5 2021-05-20      9.76e13 Madison             18
##  7 0.402            5 2021-07-22      2.62e13 Madison             33
##  8 0.406            5 2021-04-22      7.19e12 Madison             53
##  9 0.409            5 2021-03-11      1.55e13 Madison             50
## 10 0.473            5 2021-04-05      2.54e13 Madison             29
## # ... with 94 more rows, and 10 more variables: SevenDayMACases <dbl>,
## #   N1 <dbl>, BCoV <dbl>, N2 <dbl>, PMMoV <dbl>, GeoMeanN12 <dbl>,
## #   FlowRate <dbl>, temperature <dbl>, equiv_sewage_amt <dbl>, SC2_mass <dbl>
  #summarise(mean(Ratio))
SizeUsed = 1.5
alphaUsed = .9
N1ShapeUnit = 4
N2ShapeUnit = 5

IntercepterDF <- FullDF %>%
  mutate(Pop = case_when(
    Site=="MMSD-P2" ~ 111967,
    Site=="MMSD-P7" ~ 81977,
    Site=="MMSD-P8" ~ 127634,
    Site=="MMSD-P11" ~ 130799,
    Site=="MMSD-P18" ~ 151470,
    Site=="Madison" ~ 603847,
  ))%>%
  filter(Site != "Madison") #We are looking for agreement between the interceptors
                            #after some normalization so Madison just distracts


IntercepterOverLay <- IntercepterDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=N1,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
  geom_point(aes(y=N2,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
  scale_y_log10()

ggplotly(IntercepterOverLay)
IntercepterOverLayFlow <- IntercepterDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=N1*FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
  geom_point(aes(y=N2*FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
  scale_y_log10()

ggplotly(IntercepterOverLayFlow)
IntercepterOverLayPop <- IntercepterDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=N1/Pop,color = Site),size = SizeUsed, alpha= alphaUsed,shape=N1ShapeUnit)+
  geom_point(aes(y=N2/Pop, color = Site),size = SizeUsed, alpha= alphaUsed,shape=N2ShapeUnit)+
  scale_y_log10()

ggplotly(IntercepterOverLayPop)
#Shape pch=# arg. might be diffrent


IntercepterDF%>%
  group_by(Date)%>%
  mutate(Ranking = rank(N1))%>%
  group_by(Site)%>%
  summarize(SiteMeaning = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
##   Site     SiteMeaning
##   <chr>          <dbl>
## 1 MMSD-P11        1.42
## 2 MMSD-P18        2.41
## 3 MMSD-P2         3.00
## 4 MMSD-P7         3.75
## 5 MMSD-P8         4.37
IntercepterDF%>%
  group_by(Date)%>%
  mutate(Ranking = rank(N1*FlowRate))%>%
  group_by(Site)%>%
  summarize(SiteMeaningFlow = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
##   Site     SiteMeaningFlow
##   <chr>              <dbl>
## 1 MMSD-P11            1.51
## 2 MMSD-P18            2.55
## 3 MMSD-P2             2.96
## 4 MMSD-P7             3.58
## 5 MMSD-P8             4.36
IntercepterDF%>%
  group_by(Date)%>%
  mutate(Ranking = rank(N1/Pop))%>%
  group_by(Site)%>%
  summarize(SiteMeaningPop = mean(Ranking, na.rm = TRUE))
## # A tibble: 5 x 2
##   Site     SiteMeaningPop
##   <chr>             <dbl>
## 1 MMSD-P11           1.37
## 2 MMSD-P18           2.31
## 3 MMSD-P2            3.05
## 4 MMSD-P7            3.92
## 5 MMSD-P8            4.32
length(IntercepterDF$Date)
## [1] 2536
IntercepterDF%>%
  ggplot()+
  geom_histogram(aes(x=log(N1)))+
  facet_wrap(~Site)

InterceptRatioDF <- FullDF%>%
  filter(Site!="Madison")%>%
  group_by(Date)%>%
  filter(!is.na(N1))%>%
  summarise(SC2_InterSum_N1 = sum(N1*FlowRate,na.rm = TRUE),
            SC2_InterSum_N2 = sum(N2*FlowRate,na.rm = TRUE),
            nIntercepter = n())%>%
  full_join(FullDF)%>%
  filter(!is.na(N1))%>%
  mutate(RatioN1 = (N1*FlowRate/SC2_InterSum_N1)*(nIntercepter/5),
         RatioN2 = (N2*FlowRate/SC2_InterSum_N2)*(nIntercepter/5))%>%
  select(RatioN1,RatioN2,nIntercepter, everything())

InterceptRatioPlot <- InterceptRatioDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=RatioN1,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
  geom_point(aes(y=RatioN2-5,color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)
ggplotly(InterceptRatioPlot)
InterceptRatioPlotFlow <- InterceptRatioDF%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=RatioN1/FlowRate,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
  geom_point(aes(y=RatioN2/FlowRate-.3,color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)
ggplotly(InterceptRatioPlot)
InterceptRatioPlotPop <- InterceptRatioDF%>%
  mutate(Pop = case_when(
    Site=="MMSD-P2" ~ 111967,
    Site=="MMSD-P7" ~ 81977,
    Site=="MMSD-P8" ~ 127634,
    Site=="MMSD-P11" ~ 130799,
    Site=="MMSD-P18" ~ 151470
  ))%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=RatioN1/Pop,color = Site),size = SizeUsed, alpha= alphaUsed,shape=15)+
  geom_point(aes(y=RatioN2/Pop-.3,color = Site),size = SizeUsed, alpha= alphaUsed,shape=19)
ggplotly(InterceptRatioPlot)
IntercepterOverLay <- FullDFMassRatio.Inter%>%
  complete(Date,Site)%>%
  full_join(FullDFMassRatio)%>%
  ggplot(aes(x=Date))+
  geom_point(aes(y=N1,color = Site),alpha=.8)+
  scale_y_log10()
ggplotly(IntercepterOverLay)
N1BalencePlot <- FullDFMassRatio.Inter%>%
  complete(Date,Site)%>%
  ggplot()+
  geom_col(aes(x=Date,y=N1,fill=Site),
           position = "dodge", stat="identity", width = 2) + 
  geom_col(aes(x=Date,y=-3e3),
           data=MissingFlowData,color="Purple",fill="Purple", width = 2)+
  scale_fill_brewer(palette="Spectral") +
  scale_color_manual(values=c("#d60000", "#ffa600", "#7ed05c", "#5c9c43", "#3a56d6")) +
  theme_light() +
  geom_point(data=FullDFMassRatio.Mad,aes(x=Date,y=N1),size=1)+
  scale_y_log10()+
  ylab("N1 gene copies per day") +
  
  ggtitle("SARS-CoV-2, 24h Mass Loading (Madison)")
  

ggplotly(N1BalencePlot)
FullDFMassRatio.Inter%>%
  group_by(Site)%>%
  summarise(N1 = mean(N1,na.rm=TRUE),
            Flow = mean(FlowRate,na.rm=TRUE),
            SC2 = mean(SC2_mass,na.rm=TRUE),
            AntiSC2 = mean(N1/FlowRate,na.rm=TRUE))#%>%
## # A tibble: 5 x 5
##   Site          N1  Flow     SC2 AntiSC2
##   <chr>      <dbl> <dbl>   <dbl>   <dbl>
## 1 MMSD-P11 294443.  8.58 9.04e12  34459.
## 2 MMSD-P18 403384. 11.2  1.69e13  36331.
## 3 MMSD-P2  313275.  5.85 6.57e12  53950.
## 4 MMSD-P7  356002.  4.40 5.86e12  87100.
## 5 MMSD-P8  383066.  6.27 8.66e12  61481.
  #ungroup()%>%
  #summarise(VarN1 = var(N1)/mean(N1)^2,
  #          VarFlow = var(Flow)/mean(Flow)^2,
  #          VarSC2 = var(SC2)/mean(SC2)^2,
  #          VarASC2 = var(AntiSC2)/mean(AntiSC2)^2)


#Graph paportion of data intercepters are

arranged plots with lines to communicate where flagged outliers are. Red Lines show Where there was a flagged on a day that also has collected interceptor data.

StartRange <- mdy("1-20-2021")
EndRange <- mdy("1-10-2022")

AllOuliersDates <- OutlierDF[["Date"]]
OutlierDatesIntercepters <- unique(inner_join(FullDFMassRatio.Inter,OutlierDF["Date"],by=c("Date"))$Date)

a <- OutLierPlotDF+
  scale_x_date(limits = c(StartRange,EndRange))+
  #geom_vline(xintercept = AllOuliersDates)+
  geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
  #geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
  theme(title = element_blank(),
        axis.text.x = element_blank(), 
        axis.title.x = element_blank(),
        legend.position="none")
b <- MassBalencePlot+
  scale_x_date(limits = c(StartRange,EndRange))+
  #geom_vline(xintercept = (AllOuliersDates))+
  geom_vline(xintercept = (OutlierDatesIntercepters),color="Red")+
  #geom_vline(xintercept = MissingIntercepter$Date,color="Red")+
  theme(title = element_blank(),
    axis.text.x = element_blank(), 
        axis.title.x = element_blank(),
    legend.position="none")
c <- FlowPlot+
  scale_x_date(limits = c(StartRange,EndRange))+
  #geom_vline(xintercept = AllOuliersDates)+
  geom_vline(xintercept = OutlierDatesIntercepters,color="Red")+
  theme(title = element_blank(),
        legend.position="none")



ggarrange(a,b,c,nrow=3,align ="v",legend.grob=get_legend(MassBalencePlot),legend="right")

#Cause of missing data
OutlierDatesIntercepters
## [1] "2021-04-15" "2021-04-26" "2021-07-26" "2021-12-13"
N_One_Outliers <- c(ymd("2021-04-15"), ymd("2021-04-26"), ymd("2021-07-26"), ymd("2021-12-13"))
N_TWO_Outliers <- c(ymd("2021-04-15"), ymd("2021-05-20"))
LowRatioN_One <- c("2021-04-15","2021-07-26","2021-12-13","2021-05-31","2021-03-22")
LowRatioN_TWO <- c("2021-04-15","2021-07-26","2021-12-13","2021-10-04","2021-03-22")

AllIntercepterComp_POI <- c(N_One_Outliers,N_TWO_Outliers,LowRatioN_One,LowRatioN_TWO)

FullDF%>%
  filter(Date %in% AllIntercepterComp_POI)%>%
  select(-FirstConfirmed,SevenDayMACases)
##          Date     Site SevenDayMACases      N1   BCoV      N2     PMMoV
## 1  2021-03-22 MMSD-P11        3.600000   74718  7.233   34380  13995222
## 2  2021-03-22 MMSD-P18        3.600000   87689  9.071   92614   5087805
## 3  2021-03-22  MMSD-P2        6.000000   41663  6.174   21141  12906803
## 4  2021-03-22  MMSD-P7        7.400000   43453  0.741   55333  13088946
## 5  2021-03-22  MMSD-P8        8.600000   51290  5.706   44260  19118442
## 6  2021-04-15 MMSD-P11       11.285714  157424  3.626  126684  45532010
## 7  2021-04-15 MMSD-P18       10.142857      NA     NA      NA        NA
## 8  2021-04-15  MMSD-P2       10.285714   48521  3.533   48080  26162914
## 9  2021-04-15  MMSD-P7       11.428571      NA     NA      NA        NA
## 10 2021-04-15  MMSD-P8       11.857143   60518  5.001   54023  20604026
## 11 2021-04-26 MMSD-P11        4.857143  173731  8.414  121556  32400873
## 12 2021-04-26 MMSD-P18        5.333333 1761146 19.407 2270206  18185986
## 13 2021-04-26  MMSD-P2        5.833333   66599 17.244   84380 169992062
## 14 2021-04-26  MMSD-P7        7.666667  145908 12.930  109214  13107287
## 15 2021-04-26  MMSD-P8        8.166667  181856 11.324  194280  20215977
## 16 2021-05-20 MMSD-P11        2.714286    9997  2.871   29627  66800100
## 17 2021-05-20 MMSD-P18        2.857143 1945496  6.505 2297335  15415932
## 18 2021-05-20  MMSD-P2        3.571429  608010  9.994  676281  44245555
## 19 2021-05-20  MMSD-P7        3.571429   56388  2.936   58965  32473853
## 20 2021-05-20  MMSD-P8        3.000000   15247  5.676   17264  18441997
## 21 2021-05-31 MMSD-P11        1.428571    3972  3.353      NA  31740972
## 22 2021-05-31 MMSD-P18        1.333333  120744  4.302  168461  31643578
## 23 2021-07-26 MMSD-P11        4.428571   56082  1.730   57844  34062331
## 24 2021-07-26 MMSD-P18        3.714286   96259  4.820   65563  11582896
## 25 2021-07-26  MMSD-P2        5.428571   87920  7.879   86185  42669683
## 26 2021-07-26  MMSD-P7        6.285714   22234  8.842   18735  21276643
## 27 2021-07-26  MMSD-P8        7.571429   14820  9.955   17825  24223424
## 28 2021-10-04 MMSD-P11       21.857143  132758  3.220  122553  30769148
## 29 2021-10-04 MMSD-P18       25.142857  100502  0.800   81696   9759807
## 30 2021-10-04  MMSD-P2       26.285714  220932  3.560  170892  13670096
## 31 2021-10-04  MMSD-P7       29.285714  137332  3.210  101734  14498503
## 32 2021-10-04  MMSD-P8       27.857143  172914  1.680  128467  20682700
## 33 2021-12-13 MMSD-P11       19.000000  458847 16.130  326182  86006923
## 34 2021-12-13 MMSD-P18       21.000000  722770 12.720  887123  13380021
## 35 2021-12-13  MMSD-P2       34.000000  429555  2.325  475306  33305288
## 36 2021-12-13  MMSD-P7       49.285714  300888  2.231  499546  31784189
## 37 2021-12-13  MMSD-P8       56.285714  443364  8.883  456116  18625898
## 38 2021-03-22  Madison       35.000000  162650  1.035  121319  16649039
## 39 2021-04-15  Madison       50.285714  571269  3.684  706418  23628196
## 40 2021-04-26  Madison       39.333333  675748 16.079  546554  28619982
## 41 2021-05-20  Madison       19.666667  224021 14.337  285371  49311472
## 42 2021-05-31  Madison        6.000000  147362  3.645  171524  37833422
## 43 2021-07-26  Madison       43.333333  186460  3.589  143034  30230924
## 44 2021-10-04  Madison       90.000000  244225  2.350  283059  12107928
## 45 2021-12-13  Madison      229.333333 1498471 13.886 1515771  24525524
## 46 2021-05-31  MMSD-P2              NA   13114  4.599   15239  29170047
## 47 2021-05-31  MMSD-P7              NA    8117  1.276      NA  30386513
## 48 2021-05-31  MMSD-P8              NA   58037  5.907   46865  19836360
##    GeoMeanN12 FlowRate temperature equiv_sewage_amt
## 1       74718     9.02          NA            1.000
## 2       87689    11.52          NA            1.000
## 3       41663     5.81          NA            1.000
## 4       43453     3.67          NA            1.000
## 5       51290     6.18          NA            1.000
## 6      157424     8.91          NA            1.000
## 7          NA       NA          NA               NA
## 8       48521     6.33          NA            1.000
## 9          NA       NA          NA               NA
## 10      60518     6.78          NA            1.000
## 11     173731     8.62          NA            0.250
## 12    1761146    10.79          NA            0.250
## 13      66599     5.84          NA            0.250
## 14     145908     4.41          NA            0.250
## 15     181856     6.38          NA            0.250
## 16       9997     8.61          NA            0.625
## 17    1945496    11.32          NA            0.625
## 18     608010     5.52          NA            0.625
## 19      56388     3.65          NA            0.625
## 20      15247     6.88          NA            0.625
## 21       3972     7.88          NA            0.625
## 22     120744     9.98          NA            0.625
## 23      56082     8.44          NA            1.000
## 24      96259    10.28          NA            1.000
## 25      87920     5.74          NA            1.000
## 26      22234     4.52          NA            1.000
## 27      14820     5.95          NA            1.000
## 28     132758     8.47          NA            0.625
## 29     100502    10.08          NA            0.625
## 30     220932     6.74          NA            0.625
## 31     137332     4.35          NA            0.625
## 32     172914     6.27          NA            0.625
## 33     458847     8.16          NA            0.625
## 34     722770    10.74          NA            0.625
## 35     429555     6.14          NA            0.625
## 36     300888     4.65          NA            0.625
## 37     443364     6.09          NA            0.625
## 38     162650    36.20          NA            1.000
## 39     571269    39.02          NA            1.000
## 40     675748    36.04          NA            0.250
## 41     224021    35.98          NA            0.625
## 42     147362    32.02          NA            0.625
## 43     186460    34.93          NA            1.000
## 44     244225    35.91          NA            0.625
## 45    1498471    35.78          NA            0.625
## 46      13114     4.74          NA            0.625
## 47       8117     3.91          NA            0.625
## 48      58037     5.51          NA            0.625